rm(list=ls())

Libraries

library(tidyverse)
library(colorspace)
library(scales)
library(skimr)
library(lubridate)
library(plotly)
library(gridExtra)
library(patchwork)

Reading and cleaning the data

df2<- read_csv("C:/Users/piotr/Desktop/pythonfiles/mass_shootings_usa.csv")
df2 <- df2%>%
  select(-c(1))
head(df2)
## # A tibble: 6 × 9
##   incident_date state     city_or_country address victims_killed victims_injured
##   <date>        <chr>     <chr>           <chr>            <dbl>           <dbl>
## 1 2023-12-01    Nevada    Las Vegas       E Char…              1               4
## 2 2023-11-29    Illinois  Chicago         3600 b…              1               3
## 3 2023-11-29    North Ca… Lexington       7000 b…              0               5
## 4 2023-11-26    North Ca… Winston Salem … 3533 N…              0               4
## 5 2023-11-25    Californ… Fontana         15500 …              3               1
## 6 2023-11-23    Tennessee Memphis         2707 K…              0               4
## # ℹ 3 more variables: suspects_killed <dbl>, suspects_injured <dbl>,
## #   suspects_arrested <dbl>
pal3 = colorspace::choose_palette()(n=42)
th <- theme(
        axis.text.x=element_text(size=12),
        axis.text.y=element_text(size=12),
        axis.title.x=element_text(size=14),
        axis.title.y=element_text(size=14),
        legend.title=element_text(size=16),
        legend.text=element_text(size=12),
        plot.title=element_text(size=18),
        plot.subtitle = element_text(size=18)
        )

Plots

Victims killed

Total victims killed by state

p1 <- df2 %>%
  mutate(across(c(2,3),as.factor)) %>%
  mutate(incident_month = month(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = wday(incident_date,label=TRUE,abbr=FALSE)) %>%
  select(c(1,10,11,everything())) %>%
  group_by(state) %>%
  summarise(sum_victims_killed = sum(victims_killed)) %>%
  arrange(desc(sum_victims_killed)) %>%
  filter(sum_victims_killed > 0) %>%
  mutate(state = fct_reorder(state,sum_victims_killed)) %>%
  ggplot(aes(x = state, y = sum_victims_killed, fill = sum_victims_killed, text = paste("State:", state, "\nTotal Victims Killed:", sum_victims_killed))) +
  geom_bar(stat="identity",color="black") +
  scale_fill_gradientn(colors=pal3,name = "Total Victims Killed") + 
  labs(x="",y="",title = "Total Victims Killed by State",
       subtitle = "01/01/2023 - 02/12/2023") + 
  coord_flip() + 
  theme_minimal() +
  th

ggplotly(p1,tooltip="text") %>%
  layout(title = list(text = paste0('Total Victims Killed by State',
                                    '<br>',
                                    '<sup>',
                                     '01/01/2023 - 02/12/2023')))

Total victims killed by month

p2<-df2 %>%
  mutate(across(c(2,3),as.factor)) %>%
  mutate(incident_month = month(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = wday(incident_date,label=TRUE,abbr=FALSE)) %>%
  select(c(1,10,11,everything())) %>%
  group_by(incident_month) %>%
  summarise(sum_victims_killed = sum(victims_killed)) %>%
  filter(!(incident_month == "December")) %>%
  ggplot(aes(x = incident_month, y = sum_victims_killed, fill = sum_victims_killed, text = paste("Month:", incident_month, "\nTotal Victims Killed:", sum_victims_killed))) +
  geom_bar(stat="identity",color="black") +
  scale_fill_gradientn(colors=pal3,name = "Total Victims Killed") + 
  labs(x="",y="",title = "Total Victims Killed by Month ",subtitle = "01/01/2023 - 02/12/2023") +
  coord_flip() + 
  theme_minimal() +
  th

ggplotly(p2,tooltip="text") %>%
  layout(title = list(text = paste0('Total Victims Killed by Month',
                                    '<br>',
                                    '<sup>',
                                     '01/01/2023 - 02/12/2023')))

Total victims killed by day of week

p3<-df2 %>%
  mutate(across(c(2,3),as.factor)) %>%
  mutate(incident_month = month(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = wday(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = factor(incident_day, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))) %>%
  select(c(1,10,11,everything())) %>%
  group_by(incident_day) %>%
  summarise(sum_victims_killed = sum(victims_killed)) %>%
  ggplot(aes(x = incident_day, y = sum_victims_killed, fill = sum_victims_killed, text = paste("Day:", incident_day, "\nTotal Victims Killed:", sum_victims_killed))) +
  geom_bar(stat="identity",color="black") +
  scale_fill_gradientn(colors=pal3,name = "Total Victims Killed") + 
  labs(x="",y="",title = "Total Victims Killed by Day of Week",subtitle = "01/01/2023 - 02/12/2023") + 
  coord_flip() + 
  theme_minimal() +
  th

ggplotly(p3,tooltip="text") %>%
  layout(title = list(text = paste0('Total Victims Killed by Day of Week',
                                    '<br>',
                                    '<sup>',
                                     '01/01/2023 - 02/12/2023')))

Victims killed - line plots

p6 <- plot_ly(data = df2, 
            x = ~incident_date, 
            y = ~victims_killed, 
            type = 'scatter', 
            mode = 'lines',
          line = list(color = '#CA3242', width = 2),
          text = ~paste0("Date: ",incident_date, "\n","Victims Killed: ",victims_killed)) %>%
  layout(title = list(text="Victims Killed",font=list(size=20),y=1),
         xaxis = list(title="",tickfont=list(size=18)),
         yaxis = list(title="",tickfont=list(size=18)),
         showlegend = FALSE) %>%
  add_annotations(text = "01/01/2023-02/12/2023", showarrow = FALSE,
                  xref = "paper", yref = "paper", x = 0, y = 1,
                  font = list(size=18)) %>%
      style(hoverinfo="text") 

p6
df_sum_killed <- df2 %>%
  mutate(across(c(2,3),as.factor)) %>%
  mutate(incident_month = month(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = wday(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = factor(incident_day, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))) %>%
  select(c(1,10,11,everything())) %>%
  mutate(incident_date = as.character(incident_date)) %>%
  mutate(incident_date2 = str_sub(incident_date,start=1, end = 7)) %>%
  mutate(incident_date2 = as.factor(incident_date2)) %>%
  group_by(incident_date2) %>%
  summarise(sum_victims_killed = sum(victims_killed)) %>%
  filter(!(incident_date2 == "2023-12")) 
plot_ly(data=df_sum_killed,
        x=~incident_date2,
        y=~sum_victims_killed,
        type="scatter",
        mode = "lines",
        line = list(color = "#CA3242",width=2),
        text = ~paste0("Date: ",incident_date2, "\n", "Total victims killed: ",sum_victims_killed)) %>%
  layout(title=list(text="Total Victims Killed", font=list(size=24),y=1),
         xaxis = list(title = "",tickfont = list(size=18)),
         yaxis = list(title = "", tickfont=list(size=18)),
         showlegend=FALSE) %>%
  add_annotations(text = "01/01/2023 - 02/12/2023", showarrow =FALSE,
                  xref="paper",yref="paper",
                  font=list(size=16),y=1,x=0,xref="paper",yref="paper") %>%
  style(hoverinfo="text")

Victims killed - stripchart,boxplot and density plot

par(mfrow=c(3,1))
stripchart(df2$victims_killed,method = "jitter",vertical = FALSE,cex=7,cex.main=2,cex.axis=2,col="#CA3242",main="Victims Killed Stripchart")
boxplot(df2$victims_killed,cex=7,cex.axis=2,cex.main=2,col="#CA3242",main="Victims Killed Boxplot",horizontal = TRUE)
plot(density(df2$victims_killed),col="#CA3242",cex=7,cex.axis=2,cex.main=2,cex.lab=2,main="Victims Killed Densityplot",lwd=3)

Victims injured

p4 <- df2 %>%
  mutate(across(c(2,3),as.factor)) %>%
  mutate(incident_month = month(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = wday(incident_date,label=TRUE,abbr=FALSE)) %>%
  select(c(1,10,11,everything())) %>%
  group_by(state) %>%
  summarise(sum_victims_injured = sum(victims_injured)) %>%
  arrange(desc(sum_victims_injured)) %>%
  filter(sum_victims_injured > 0) %>%
  mutate(state = fct_reorder(state,sum_victims_injured)) %>%
  ggplot(aes(x = state, y = sum_victims_injured, fill = sum_victims_injured, text = paste("State:", state, "\nTotal Victims Injured:", sum_victims_injured))) +
  geom_bar(stat="identity",color="black") +
  scale_fill_gradientn(colors=pal3,name = "Total Victims Injured") + 
  labs(x="",y="",title = "Sum of Victims Injured by State",subtitle = "01/01/2023 - 02/12/2023") + 
  coord_flip() + 
  theme_minimal() +
  th

ggplotly(p4,tooltip="text") %>%
   layout(title = list(text = paste0('Total Victims Injured by State',
                                    '<br>',
                                    '<sup>',
                                     '01/01/2023 - 02/12/2023')))

Victims injured by month

p5<-df2 %>%
  mutate(across(c(2,3),as.factor)) %>%
  mutate(incident_month = month(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = wday(incident_date,label=TRUE,abbr=FALSE)) %>%
  select(c(1,10,11,everything())) %>%
  group_by(incident_month) %>%
  summarise(sum_victims_injured = sum(victims_injured)) %>%
  filter(!(incident_month == "December")) %>%
  ggplot(aes(x = incident_month, y = sum_victims_injured, fill = sum_victims_injured, text = paste("Month:", incident_month, "\nTotal Victims Injured:", sum_victims_injured))) +
  geom_bar(stat="identity",color="black") +
  scale_fill_gradientn(colors=pal3,name = "Total Victims Injured") + 
  labs(x="",y="",title = "Total Victims Injured by Month",subtitle = "01/01/2023 - 02/12/2023")+ 
  coord_flip() + 
  theme_minimal() +
  th

ggplotly(p5,tooltip="text") %>%
  layout(title = list(text = paste0('Total Victims Injured by Month',
                                    '<br>',
                                    '<sup>',
                                     '01/01/2023 - 02/12/2023')))

Victims injured by day of week

p5<-df2 %>%
  mutate(across(c(2,3),as.factor)) %>%
  mutate(incident_month = month(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = wday(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = factor(incident_day, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))) %>%
  select(c(1,10,11,everything())) %>%
  group_by(incident_day) %>%
  summarise(sum_victims_injured = sum(victims_injured)) %>%
  ggplot(aes(x = incident_day, y = sum_victims_injured, fill = sum_victims_injured, text = paste("Day:", incident_day, "\nTotal Victims Injured:", sum_victims_injured))) +
  geom_bar(stat="identity",color="black") +
  scale_fill_gradientn(colors=pal3,name = "Total Victims Injured") + 
  labs(x="",y="",title = "Total Victims Injured by Day of Week",subtitle = "01/01/2023 - 02/12/2023")+
  coord_flip() + 
  theme_minimal() +
  th

ggplotly(p5,tooltip="text") %>%
  layout(title = list(text = paste0('Total Victims Injured by Day of Week',
                                    '<br>',
                                    '<sup>',
                                     '01/01/2023 - 02/12/2023')))

Victims injured - stripchart, boxplot and density plot

par(mfrow=c(3,1))
stripchart(df2$victims_injured,method = "jitter",vertical = FALSE,cex=7,cex.main=2,cex.axis=2,col="#CA3242",main="Victims Injured Stripchart")
#beeswarm::beeswarm(df2$victims_injured,method = "hex",horizontal = TRUE,cex = 1,pch = 22,col ="#CA3242",main="Victims Injured Beeswarm Plot",ylab="Frequency",cex.axis=2,cex.main=2,cex.lab=2)
boxplot(df2$victims_injured,cex=7,cex.axis=2,cex.main=2,col="#CA3242",main="Victims Injured Boxplot",horizontal = TRUE)
plot(density(df2$victims_injured),col="#CA3242",cex=7,cex.axis=2,cex.main=2,cex.lab=2,main="Victims Injured Densityplot",lwd=3)

Victims injured - line plots

p7 <- plot_ly(data = df2, 
            x = ~incident_date, 
            y = ~victims_injured, 
            type = 'scatter', 
            mode = 'lines',
          line = list(color = '#CA3242', width = 2),
          text = ~paste0("Date: ",incident_date, "\n","Victims injured: ",victims_injured)) %>%
  layout(title = list(text="Victims Injured",font=list(size=20),y=1),
         xaxis = list(title="",tickfont=list(size=18)),
         yaxis = list(title="",tickfont=list(size=18)),
         showlegend = FALSE) %>%
  add_annotations(text = "01/01/2023-02/12/2023", showarrow = FALSE,
                  xref = "paper", yref = "paper", x = 0, y = 1,
                  font = list(size=18)) %>%
      style(hoverinfo="text") 

p7
df_sum_injured <- df2 %>%
  mutate(across(c(2,3),as.factor)) %>%
  mutate(incident_month = month(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = wday(incident_date,label=TRUE,abbr=FALSE)) %>%
  mutate(incident_day = factor(incident_day, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))) %>%
  select(c(1,10,11,everything())) %>%
  mutate(incident_date = as.character(incident_date)) %>%
  mutate(incident_date2 = str_sub(incident_date,start=1, end = 7)) %>%
  mutate(incident_date2 = as.factor(incident_date2)) %>%
  group_by(incident_date2) %>%
  summarise(sum_victims_injured = sum(victims_injured)) %>%
  filter(!(incident_date2 == "2023-12")) 


plot_ly(data=df_sum_injured,
        x=~incident_date2,
        y=~sum_victims_injured,
        type="scatter",
        mode = "lines",
        line = list(color = "#CA3242",width=2),
        text = ~paste0("Date: ",incident_date2, "\n", "Total victims injured: ",sum_victims_injured)) %>%
  layout(title=list(text="Total Victims Injured", font=list(size=24),y=1),
         xaxis = list(title = "",tickfont = list(size=18)),
         yaxis = list(title = "", tickfont=list(size=18)),
         showlegend=FALSE) %>%
  add_annotations(text = "01/01/2023 - 02/12/2023", showarrow =FALSE,
                  xref="paper",yref="paper",
                  font=list(size=16),y=1,x=0,xref="paper",yref="paper") %>%
  style(hoverinfo="text")